package net.bwie.realtime.jtp.dws.douyin.log.utils;

import com.huaban.analysis.jieba.JiebaSegmenter;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class AnalyzerUtil {

    /*
    使用ikAnalyzer分词器对中文文本进行普通分词
     */
    /**
     * 使用JiebaAnalyzer分词器对中文文本进行普通分词
     */
    public static List<String> jiebaAnalyzer(String content) {
        // JieBa工具类对象
        JiebaSegmenter js = new JiebaSegmenter();
        // 分词
        List<String> list = js.sentenceProcess(content.replaceAll("[\\pP‘’“”\\s+]", ""));
        // 返回
        return list;
    }
    public static List<String> ikAnalyzer(String content) throws Exception{
        ArrayList<String> list = new ArrayList<>();
        //分词对象
        IKSegmenter ikSegmenter = new IKSegmenter(
                new StringReader(content), true
        );
        //遍历
        Lexeme lexeme;//创建变量
        while ((lexeme=ikSegmenter.next())!=null){
            String text = lexeme.getLexemeText();
            list.add(text);
        }
        //返回
        return list;
    }

    public static void main(String[] args) throws Exception{
        List<String> list = ikAnalyzer("你好呀，你多大了？");
        System.out.println(Arrays.toString(list.toArray()));
    }

}
